15. Mapping Schools

https://data.cityofnewyork.us/Education/2019-2020-School-Locations/wg9x-4ke6

[6]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import geopandas as gpd
import folium
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown as md

from nycschools import schools, exams, ui


from IPython.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[7]:
# load and clean the school demographics and school location data
demo = schools.load_school_demographics()
dbns = demo.dbn.unique()


demo.columns

[7]:
Index(['dbn', 'beds', 'district', 'geo_district', 'boro', 'school_name',
       'short_name', 'ay', 'year', 'total_enrollment',
       'grade_3k_pk_half_day_full', 'grade_k', 'grade_1', 'grade_2', 'grade_3',
       'grade_4', 'grade_5', 'grade_6', 'grade_7', 'grade_8', 'grade_9',
       'grade_10', 'grade_11', 'grade_12', 'female_n', 'female_pct', 'male_n',
       'male_pct', 'asian_n', 'asian_pct', 'black_n', 'black_pct',
       'hispanic_n', 'hispanic_pct', 'multi_racial_n', 'multi_racial_pct',
       'native_american_n', 'native_american_pct', 'white_n', 'white_pct',
       'missing_race_ethnicity_data_n', 'missing_race_ethnicity_data_pct',
       'swd_n', 'swd_pct', 'ell_n', 'ell_pct', 'poverty_n', 'poverty_pct',
       'eni_pct', 'clean_name', 'zip'],
      dtype='object')
[12]:
geojsonurl = "https://data.cityofnewyork.us/resource/a3nt-yts4.geojson?$limit=1000000"
df = gpd.read_file(geojsonurl)

df = df.rename(columns={"xcoordinat":"x","ycoordinat":"y",})
df.x = pd.to_numeric(df.x, errors='coerce')
df.y = pd.to_numeric(df.y, errors='coerce')
df = df[df.x > 0]
df["dbn"] = df.ats_code

df = df[["x","y","dbn","zip","loc_name"]]

url = "https://data.cityofnewyork.us/resource/wg9x-4ke6.csv?$limit=1000000"
loc2 = pd.read_csv(url)
loc2["dbn"] = loc2.system_code
cols = [
    'dbn',
    'administrative_district_code',
    'administrative_district_name',
    'beds',
    'borough_block_lot',
    'census_tract',
    'community_district',
    'community_district_1',
    'community_school_sup_name',
    'council_district',
    'fax_number',
    'fiscal_year',
    'geographical_district_code',
    'grades_final_text',
    'grades_text',
    'highschool_network',
    'highschool_network_location',
    'highschool_network_name',
    'latitude',
    'location_category_description',
    'location_code',
    'location_name',
    'location_type_description',
    'longitude',
    'managed_by_name',
    'nta',
    'nta_name',
    'open_date',
    'police_precinct',
    'primary_building_code',
    'principal_name',
    'principal_phone_number',
    'principal_title',
    'state_code',
    'status_descriptions']

loc2 = loc2[cols]

df = df.merge(loc2, on="dbn", how="left")
df.open_date
[12]:
0       1999-07-01T00:00:00.000
1       1898-07-01 00:00:00.000
2       1904-07-01T00:00:00.000
3       1972-07-01T00:00:00.000
4       2011-07-01T00:00:00.000
                 ...
2099    1900-07-01T00:00:00.000
2100    2000-07-01T00:00:00.000
2101    1961-07-01T00:00:00.000
2102    2011-07-01T00:00:00.000
2103    1927-07-01T00:00:00.000
Name: open_date, Length: 2104, dtype: object
[11]:


a = set(df.dbn) b = set(loc2.dbn) df = df.merge(loc2, on="dbn", how="right") df.columns
[11]:
Index(['x', 'y', 'dbn', 'zip', 'loc_name', 'administrative_district_code_x',
       'administrative_district_name_x', 'beds_x', 'borough_block_lot_x',
       'census_tract_x', 'community_district_x', 'community_district_1_x',
       'community_school_sup_name_x', 'council_district_x', 'fax_number_x',
       'fiscal_year_x', 'geographical_district_code_x', 'grades_final_text_x',
       'grades_text_x', 'highschool_network_x',
       'highschool_network_location_x', 'highschool_network_name_x',
       'latitude_x', 'location_category_description_x', 'location_code_x',
       'location_name_x', 'location_type_description_x', 'longitude_x',
       'managed_by_name_x', 'nta_x', 'nta_name_x', 'open_date_x',
       'police_precinct_x', 'primary_building_code_x', 'principal_name_x',
       'principal_phone_number_x', 'principal_title_x', 'state_code_x',
       'status_descriptions_x', 'administrative_district_code_y',
       'administrative_district_name_y', 'beds_y', 'borough_block_lot_y',
       'census_tract_y', 'community_district_y', 'community_district_1_y',
       'community_school_sup_name_y', 'council_district_y', 'fax_number_y',
       'fiscal_year_y', 'geographical_district_code_y', 'grades_final_text_y',
       'grades_text_y', 'highschool_network_y',
       'highschool_network_location_y', 'highschool_network_name_y',
       'latitude_y', 'location_category_description_y', 'location_code_y',
       'location_name_y', 'location_type_description_y', 'longitude_y',
       'managed_by_name_y', 'nta_y', 'nta_name_y', 'open_date_y',
       'police_precinct_y', 'primary_building_code_y', 'principal_name_y',
       'principal_phone_number_y', 'principal_title_y', 'state_code_y',
       'status_descriptions_y'],
      dtype='object')
[ ]:
df = df.merge(demo, on="dbn", how="inner")
demo[~demo.dbn.isin(df.dbn)]
dbn beds district boro school_name short_name ay year total_enrollment grade_3k_pk_half_day_full ... missing_race_ethnicity_data_n missing_race_ethnicity_data_pct swd_n swd_pct ell_n ell_pct poverty_n poverty_pct eni_pct clean_name
9142 84X489 320900861084 84 Bronx South Bronx Classical Charter School III PS 489 2016 2016-17 160 0 ... 0 0.000000 16 0.100000 30 0.188 142 0.887 0.783 south bronx classical charter school iii
9143 84X489 320900861084 84 Bronx South Bronx Classical Charter School III PS 489 2017 2017-18 275 0 ... 1 0.003636 24 0.087273 56 0.204 242 0.880 0.884 south bronx classical charter school iii
9144 84X489 320900861084 84 Bronx South Bronx Classical Charter School III PS 489 2018 2018-19 316 0 ... 1 0.003165 34 0.107595 66 0.209 281 0.889 0.877 south bronx classical charter school iii
9145 84X489 320900861084 84 Bronx South Bronx Classical Charter School III PS 489 2019 2019-20 323 0 ... 0 0.000000 33 0.102167 59 0.183 291 0.901 0.875 south bronx classical charter school iii
9146 84X489 320900861084 84 Bronx South Bronx Classical Charter School III PS 489 2020 2020-21 430 0 ... 1 0.002326 47 0.109302 91 0.212 401 0.933 0.865 south bronx classical charter school iii
9219 84X588 320900861122 84 Bronx South Bronx Classical Charter School IV NA 588 2017 2017-18 129 0 ... 0 0.000000 14 0.108527 27 0.209 120 0.930 0.913 south bronx classical charter school iv
9220 84X588 320900861122 84 Bronx South Bronx Classical Charter School IV PS 588 2018 2018-19 154 0 ... 0 0.000000 17 0.110390 44 0.286 144 0.935 0.930 south bronx classical charter school iv
9221 84X588 320900861122 84 Bronx South Bronx Classical Charter School IV PS 588 2019 2019-20 164 0 ... 0 0.000000 25 0.152439 47 0.287 158 0.960 0.920 south bronx classical charter school iv
9222 84X588 320900861122 84 Bronx South Bronx Classical Charter School IV PS 588 2020 2020-21 208 0 ... 3 0.014423 26 0.125000 54 0.260 169 0.813 0.852 south bronx classical charter school iv

9 rows × 49 columns

[5]:
# drop schools not in the demographic data set
# df = df[df["system_code"].isin(dbns)]


districts = gpd.read_file("https://data.cityofnewyork.us/api/geospatial/r8nu-ymqj?method=export&format=GeoJSON")
# rename the columns
districts.columns = ['district', 'area', 'length', 'geometry']
districts.district = pd.to_numeric(districts.district, downcast='integer', errors='coerce')
fig, ax = plt.subplots(figsize=(16, 16))

districts.plot(ax=ax)
df.plot(ax=ax,color="red")
[5]:
<AxesSubplot:>
../../_images/nb__archive_mapping-schools_6_1.png
[ ]:
# get some better column names
df = df.rename(columns={"system_code":"dbn"})
df["x"] = df.longitude
df["y"] = df.latitude

# drop rows that might be missing geolocation data or have bad data
df = df[df["x"].notnull() & df["y"].notnull()]
[ ]:

geo = gpd.points_from_xy(x=df.x,y=df.y) gdf = gpd.GeoDataFrame(df, geometry=geo, crs="EPSG:4326") gdf[["dbn", "location_name", "geometry"]] df.columns gdf.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook
[ ]:
# join the demographic and geographic data sets
school_geo = gdf.set_index("dbn").join(demo.set_index("dbn"))
school_geo = school_geo.reset_index()
# read the GeoJSON file directly from the download link
districts = gpd.read_file("https://data.cityofnewyork.us/api/geospatial/r8nu-ymqj?method=export&format=GeoJSON")
# rename the columns
districts.columns = ['district', 'area', 'length', 'geometry']
districts.district = pd.to_numeric(districts.district, downcast='integer', errors='coerce')
school_geo.columns
Index(['dbn', 'fiscal_year', 'location_code', 'location_name', 'beds',
       'managed_by_name', 'location_type_description',
       'location_category_description', 'grades_text', 'grades_final_text',
       'open_date', 'status_descriptions', 'primary_building_code',
       'primary_address_line_1', 'state_code', 'x_coordinate', 'y_coordinate',
       'longitude', 'latitude', 'community_district', 'council_district',
       'census_tract', 'borough_block_lot', 'nta', 'nta_name',
       'principal_name', 'principal_title', 'principal_phone_number',
       'fax_number', 'geographical_district_code',
       'administrative_district_code', 'administrative_district_name',
       'community_school_sup_name', 'tier_3_support_location_name',
       'tier_3_support_leader_name', 'tier_2_support_location_name',
       'highschool_network_location', 'highschool_network_name',
       'highschool_network', 'community_district_1', 'police_precinct', 'x',
       'y', 'geometry', 'district', 'boro', 'school_name', 'short_name', 'ay',
       'year', 'total_enrollment', 'grade_3k_pk_half_day_full', 'grade_k',
       'grade_1', 'grade_2', 'grade_3', 'grade_4', 'grade_5', 'grade_6',
       'grade_7', 'grade_8', 'grade_9', 'grade_10', 'grade_11', 'grade_12',
       'female_n', 'female_pct', 'male_n', 'male_pct', 'asian_n', 'asian_pct',
       'black_n', 'black_pct', 'hispanic_n', 'hispanic_pct', 'multi_racial_n',
       'multi_racial_pct', 'native_american_n', 'native_american_pct',
       'white_n', 'white_pct', 'missing_race_ethnicity_data_n',
       'missing_race_ethnicity_data_pct', 'swd_n', 'swd_pct', 'ell_n',
       'ell_pct', 'poverty_n', 'poverty_pct', 'eni_pct', 'clean_name'],
      dtype='object')
[ ]:
# plot the schools and the districts

fig, ax = plt.subplots(figsize=(16, 16))

districts.plot(ax=ax)
plt.scatter(x=school_geo.x, y= school_geo.y, color="red")



<matplotlib.collections.PathCollection at 0x7f3ae0c8c1f0>
../../_images/nb__archive_mapping-schools_10_1.png
[ ]:
# convert district into specific geospation format called epsg 4326
districts = districts.to_crs(epsg=4326)


district_map = districts.explore(
     column="district", # use district for the categories (aka chloropath)
     popup=False,
     tooltip="district",
     tiles="CartoDB positron", # use "CartoDB positron" tiles
     cmap="tab20b", # use "tab20b" matplotlib colormap
     style_kwds=dict(color="black") # use black outline
    )
district_map
Make this Notebook Trusted to load map: File -> Trust Notebook
[ ]:
def school_pop(row):
    html =  f"""
<div style="min-width: 200px">
dbn: {row.dbn}<br>
district: {row.district}<br>
name: {row.school_name}<br>
size: {row.total_enrollment}<br>
pct poverty: {row.poverty_pct:.1%}<br>
pct Asian: {row.asian_pct:.1%}<br>
pct Black: {row.black_pct:.1%}<br>
pct Hispanic: {row.hispanic_pct:.1%}<br>
pct White: {row.white_pct:.1%}
</div>"""
    return html


def dist_map(x):
    cmap = ui.hexmap(plt.get_cmap("tab10"))
    if x < 33:
        return cmap(0)
    if x == 84:
        return cmap(1)
    if x == 75:
        return cmap(2)
    return cmap(3)


school_geo["district_color"] = school_geo.district.apply(dist_map)
school_geo["school info"] = school_geo.apply(school_pop, axis=1)
school_geo = school_geo.to_crs(epsg=4326)

school_geo.explore(m=district_map, tooltip=False, popup="school info", color="district_color")


Make this Notebook Trusted to load map: File -> Trust Notebook
[ ]:
import math

m = districts[districts.district==24].explore(
     column="district", # use district for the categories (aka chloropath)
     tooltip=False,
     popup="district",
     tiles="CartoDB positron", # use "CartoDB positron" tiles
     cmap="tab20b", # use "tab20b" matplotlib colormap
     style_kwds=dict(color="black") # use black outline
    )


def school_info(row):
    html =  f"""
<div style="min-width: 200px">
dbn: {row.dbn}<br>
district: {row.district}<br>
name: {row.school_name}<br>
size: {row.total_enrollment}<br>
pct poverty: {row.poverty_pct:.1%}<br>
pct Asian: {row.asian_pct:.1%}<br>
pct Black: {row.black_pct:.1%}<br>
pct Hispanic: {row.hispanic_pct:.1%}<br>
pct White: {row.white_pct:.1%}
</div>"""
    pop = folium.map.Popup(html)
    return pop


def map_district_color(x):
    cmap = ui.hexmap(plt.get_cmap("tab10"))
    if x < 33:
        return cmap(0)

    if x == 84:
        return cmap(1)
    if x == 75:
        return cmap(2)
    return cmap(3)

school_geo["school_color"] = school_geo.district.apply(map_district_color)

def map_school(row):
    folium.Circle(
        radius=(50 * math.log(row.poverty_pct * 10)),
        location=[row.latitude, row.longitude],
        tooltip=row.dbn,
        popup=school_info(row),
        color=map_district_color(row.district),
        fill=True,
    ).add_to(m)


school_geo[school_geo.district==24].apply(map_school, axis=1)


m
/home/mxc/.virtualenvs/school-data/lib/python3.10/site-packages/mapclassify/classifiers.py:891: RuntimeWarning: invalid value encountered in double_scalars
  gadf = 1 - self.adcm / adam
Make this Notebook Trusted to load map: File -> Trust Notebook
[ ]:
import math
math.log(4)
1.3862943611198906